/-
Copyright (c) 2024 Michael Rothgang. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Michael Rothgang
-/

import Lake.CLI.Main
import Lean.Elab.ParseImportsFast
import Batteries.Data.String.Basic
import Mathlib.Tactic.Linter.TextBased
import Cli.Basic

/-!
# Text-based style linters

This file defines the `lint-style` executable which runs all text-based style linters.
The linters themselves are defined in `Mathlib/Tactic/Linter/TextBased.lean`.

In addition, this checks that
- `Mathlib.Init` is (transitively) imported in all of mathlib, and
- every file in `scripts` is documented in its top-level README.
-/

open Cli Lean.Linter Mathlib.Linter.TextBased System.FilePath

/-- Additional imports generated by `mk_all`. -/
def explicitImports : Array Lean.Name := #[`Batteries, `Std]

/-- Remove the additional imports generated by `mk_all` so that only mathlib modules remain. -/
def eraseExplicitImports (names : Array Lean.Name) : Array Lean.Name :=
  explicitImports.foldl Array.erase names

/-- Get the root package of the Lake workspace we are running in. -/
def getWorkspaceRoot : IO Lake.Package := do
  let (elanInstall?, leanInstall?, lakeInstall?) ← Lake.findInstall?
  let config ← Lake.MonadError.runEIO <| Lake.mkLoadConfig { elanInstall?, leanInstall?, lakeInstall? }
  let some workspace ← Lake.loadWorkspace config |>.toBaseIO
    | throw <| IO.userError "failed to load Lake workspace"
  return workspace.root

/-- Convert the options that Lake knows into the option that Lean knows. -/
def toLeanOptions (opts : Array Lake.LeanOption) : Lean.Options :=
  opts.foldl (init := Lean.Options.empty) fun opts o =>
    -- Strip off the `weak.` prefix, like Lean does when parsing command line arguments.
    if o.name.getRoot == `weak
    then
      opts.insert (o.name.replacePrefix `weak Lean.Name.anonymous) o.value.toDataValue
    else
      opts.insert o.name o.value.toDataValue

/-- Determine the `Lean.Options` from the Lakefile of the current project.

We have to do this since style linters do not run in the `CoreM`/`CommandElabM` monads,
and so they do not get access to the options in scope.

Please do not confuse this with the Lean options at the moment that `lint-style` was compiled.
-/
def getLakefileLeanOptions : IO Lean.Options := do
  let root ← getWorkspaceRoot
  -- Some projects declare options in the root package.
  let rootOpts := root.leanOptions
  -- Other projects, like Mathlib, declare options in the targets.
  -- Here we use the default targets, since that probably contains the modules we'll be linting.
  let defaultOpts := root.defaultTargets.flatMap fun target ↦
    if let some lib := root.findLeanLib? target then
      lib.config.leanOptions
    else if let some exe := root.findLeanExe? target then
      exe.config.leanOptions
    else
      #[]
  return toLeanOptions (rootOpts ++ defaultOpts)

/-- Check that `Mathlib.Init` is transitively imported in all of Mathlib -/
register_option linter.checkInitImports : Bool := { defValue := false }

/-- Check that `Mathlib.Init` is transitively imported in all of Mathlib.
Moreover, every file imported in `Mathlib.Init` should in turn import the `Header` linter
(except for the header linter itself, of course).
Return the number of modules which violated one of these rules.
-/
def missingInitImports (opts : LinterOptions) : IO Nat := do
  unless getLinterValue linter.checkInitImports opts do return 0

  -- Find any file in the Mathlib directory which does not contain any Mathlib import.
  -- We simply parse `Mathlib.lean`, as CI ensures this file is up to date.
  let allModuleNames := eraseExplicitImports (← findImports "Mathlib.lean")
  let mut modulesWithoutMathlibImports := #[]
  let mut importsHeaderLinter := #[]
  for module in allModuleNames do
    let path := System.mkFilePath (module.components.map fun n ↦ n.toString)|>.addExtension "lean"
    let imports ← findImports path
    let hasNoMathlibImport := imports.all fun name ↦ name.getRoot != `Mathlib
    if hasNoMathlibImport then
      modulesWithoutMathlibImports := modulesWithoutMathlibImports.push module
    if imports.contains `Mathlib.Tactic.Linter.Header then
      importsHeaderLinter := importsHeaderLinter.push module

  -- Every file importing the `header` linter should be imported in `Mathlib/Init.lean` itself.
  -- (Downstream files should import `Mathlib.Init` and not the header linter.)
  -- The only exception are auto-generated import-only files.
  let initImports ← findImports ("Mathlib" / "Init.lean")
  let mismatch := importsHeaderLinter.filter (fun mod ↦
    ![`Mathlib, `Mathlib.Tactic, `Mathlib.Init].contains mod && !initImports.contains mod)
    -- This file is transitively imported by `Mathlib.Init`.
    |>.erase `Mathlib.Tactic.DeclarationNames
  if mismatch.size > 0 then
    IO.eprintln s!"error: the following {mismatch.size} module(s) import the `header` linter \
      directly, but should import Mathlib.Init instead: {mismatch}\n\
      The `header` linter is included in Mathlib.Init, and every file in Mathlib \
      should import Mathlib.Init.\nPlease adjust the imports accordingly."
    return mismatch.size

  -- Now, it only remains to check that every module (except for the Header linter itself)
  -- imports some file in Mathlib.
  let missing := modulesWithoutMathlibImports.erase `Mathlib.Tactic.Linter.Header
    -- This file is imported by `Mathlib/Tactic/Linter/Header.lean`.
    |>.erase `Mathlib.Tactic.Linter.DirectoryDependency
  if missing.size > 0 then
    IO.eprintln s!"error: the following {missing.size} module(s) do not import Mathlib.Init: \
      {missing}"
    return missing.size
  return 0

/-- Verify that every file in the `scripts` directory is documented in `scripts/README.md` -/
register_option linter.allScriptsDocumented : Bool := { defValue := false }

/-- Verifies that every file in the `scripts` directory is documented in `scripts/README.md`.
Return the number of undocumented scripts. -/
def undocumentedScripts (opts : LinterOptions) : IO Nat := do
  unless getLinterValue linter.allScriptsDocumented opts do return 0

  -- Retrieve all scripts (except for the `bench` directory).
  let allScripts ← (walkDir "scripts" fun p ↦ pure (p.components.getD 1 "" != "bench"))
  let allScripts := allScripts.erase ("scripts" / "bench")|>.erase ("scripts" / "README.md")
  -- Check if the README text contains each file enclosed in backticks.
  let readme : String ← IO.FS.readFile ("scripts" / "README.md")
  -- These are data files for linter exceptions: don't complain about these *for now*.
  let dataFiles := #["noshake.json", "nolints-style.txt"]
  -- For now, there are no scripts in sub-directories that should be documented.
  let fileNames := allScripts.map (·.fileName.get!)
  let undocumented := fileNames.filter fun script ↦
    !readme.containsSubstr s!"`{script}`" && !dataFiles.contains script
  if undocumented.size > 0 then
    IO.println s!"error: found {undocumented.size} undocumented script(s): \
      please describe the script(s) in 'scripts/README.md'\n  \
      {String.intercalate "," undocumented.toList}"
  return undocumented.size

/-- Implementation of the `lint-style` command line program. -/
def lintStyleCli (args : Cli.Parsed) : IO UInt32 := do
  -- Use the environment declared in Mathlib.Tactic.Linter.TextBased to determine the linter sets.
  Lean.initSearchPath (← Lean.findSysroot)
  let sets ← unsafe Lean.withImportModules #[{module := `Mathlib.Tactic.Linter.TextBased}] {}
    fun env => pure <| linterSetsExt.getState env

  let opts : LinterOptions := {
    toOptions := ← getLakefileLeanOptions,
    linterSets := sets,
  }

  let style : ErrorFormat := match args.hasFlag "github" with
    | true => ErrorFormat.github
    | false => ErrorFormat.humanReadable
  let fix := args.hasFlag "fix"
  -- If no modules are specified, use the defaults from the Lakefile.
  let originModules ← match args.variableArgsAs! String with
  | #[] =>
    -- If none are specified, lint the default Lake modules.
    let (elanInstall?, leanInstall?, lakeInstall?) ← Lake.findInstall?
    let config ← Lake.MonadError.runEIO <| Lake.mkLoadConfig { elanInstall?, leanInstall?, lakeInstall? }
    let some workspace ← Lake.loadWorkspace config |>.toBaseIO
      | throw <| IO.userError "failed to load Lake workspace"
    pure <| workspace.root.defaultTargets.flatMap fun target =>
      if let some lib := workspace.root.findLeanLib? target then
        lib.roots
      else if let some exe := workspace.root.findLeanExe? target then
        #[exe.config.root]
      else
        #[]
  | mods => do
    let mut result := #[]
    for mod in mods do
      let modParse := Lean.ParseImports.moduleIdent mod {}
      match modParse.error? with
      | none => result := result.append <| modParse.imports.map Lean.Import.module
      | some err => throw <| IO.userError s!"could not parse module name {mod}: {err}"
    pure result
  -- Get all the imports, but only those in the same package.
  let pkgs := originModules.map (·.components.head!)
  Lean.initSearchPath (← Lean.findSysroot)
  let searchPath ← Lean.getSrcSearchPath
  let allModuleNames ← originModules.flatMapM fun mod => do
    let imports ← match ← searchPath.findWithExt "lean" mod with
    | some file => findImports file
    | none => throw <| IO.userError s!"could not find module with name {mod}"
    pure <| imports.filter (·.components.head! ∈ pkgs)

  -- Read the `nolints` file, with manual exceptions for the linter.
  -- NB. We pass these lints to `lintModules` explicitly to prevent cache invalidation bugs:
  -- if the text-based linter read the file itself, replaying a cached build of that
  -- file could re-use an outdated version of the nolints file.
  -- (For syntax linters, such a bug actually occurred in mathlib.)
  -- This script is re-run each time, hence is immune to such issues.
  let filename : System.FilePath := ("scripts" / "nolints-style.txt")
  let nolints ← try
    IO.FS.lines filename
  catch _ =>
    IO.eprintln s!"warning: nolints file could not be read; treating as empty: {filename}"
    pure #[]
  let numberErrors := (← lintModules opts nolints allModuleNames style fix)
    + (← missingInitImports opts) + (← undocumentedScripts opts) + (← modulesNotUpperCamelCase opts allModuleNames)
  -- If run with the `--fix` argument, return a zero exit code.
  -- Otherwise, make sure to return an exit code of at most 125,
  -- so this return value can be used further in shell scripts.
  if args.hasFlag "fix" then
    return 0
  else return min numberErrors 125

/-- Setting up command line options and help text for `lake exe lint-style`. -/
-- so far, no help options or so: perhaps that is fine?
def lintStyle : Cmd := `[Cli|
  «lint-style» VIA lintStyleCli; ["0.0.1"]
  "Run text-based style linters on every Lean file in specified modules.
  Print errors about any unexpected style errors to standard output."

  FLAGS:
    github;     "Print errors in a format suitable for github problem matchers\n\
                 otherwise, produce human-readable output"
    fix;        "Automatically fix the style error, if possible"

  ARGS:
    ...modules : String; "Which modules, and their imports, will be linted.\n\
                          If no modules are specified, the linter runs on the default Lake module(s)."
]

/-- The entry point to the `lake exe lint-style` command. -/
def main (args : List String) : IO UInt32 := do lintStyle.validate args
